* Reset settings and initialize log file
launch, path("share/winter_sectors")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Decompose the winter drop in male/female employment by sector.
*-------------------------------------------------------------------------------


* Prepare data and run models
*-------------------------------------------------------------------------------

if "$estimate" != "0" {
	* Load data on adult men
	gzuse "$basepath/data/derived/cps_bms_sample.dta.gz", clear

	* Retain variables we need
	keep pid tm year month wtraked tmspline* weeks female emp ind1990

	* Assign employed individuals to coarse sectoral groups
	gen byte sector = .
	replace sector = 1 if emp == 1 & inrange(ind1990, 010, 032) | ind1990 == 060
	replace sector = 2 if emp == 1 & inrange(ind1990, 500, 691)
	replace sector = 3 if emp == 1 & inrange(ind1990, 100, 432)
	replace sector = 4 if emp == 1 & missing(sector)

	label define sector_lbl 0 "Aggregate", replace
	label define sector_lbl 1 "Construction and agriculture", add
	label define sector_lbl 2 "Wholesale and retail trade", add
	label define sector_lbl 3 "Manufacturing and transportation", add
	label define sector_lbl 4 "All other sectors", add
	label values sector sector_lbl

	* Store a list of job types
	quietly levelsof sector, local(sectors)

	* Record lagged status
	gen l_emp = L.emp
	gen l_sector = L.sector

	* Restrict to linked observations
	keep if !missing(wtraked)

	* Measure employment (e), inflows (f), and outflows (s)
	gen byte e_j0 = emp
	gen byte f_j0 = (l_emp == 0 & emp == 1)
	gen byte s_j0 = (l_emp == 1 & emp == 0)

	* Distinguish employment/flows by sector
	foreach j in `sectors' {
		* Transitions between each job type and non-employment
		gen byte e_j`j' = e_j0 * (sector == `j')
		gen byte f_j`j' = f_j0 * (sector == `j')
		gen byte s_j`j' = s_j0 * (l_sector == `j')
	}

	* Aggregate flows across individuals
	gcollapse (mean) f*j* s*j* (first) month tmspline* weeks (rawsum) wtraked [pw = wtraked], by(female tm)
	quietly tsset female tm

	* Loop over sex
	foreach f of numlist 0 1 {
		* Loop over outcome variables
		foreach yvar of varlist f*j* s*j* {
			* Run the specification
			quietly ivreg2 `yvar' ib5.month D.tmspline* D.weeks if female == `f' [aw = wtraked], bw($bandwidth) robust small
			process_estimates, path("winter_sectors") model("f`f'_`yvar'")
		}
	}
}


* Create a dataset of estimates
*-------------------------------------------------------------------------------

if "$estimate" != "0" {
	* Load estimates into memory
	load_estimates, path("winter_sectors")

	* Prepare coefficient labels for each month
	make_coeflabels

	* Turn into a dataset
	foreach f of numlist 0 1 {
		foreach x in "f" "s" {
			foreach j in 0 `sectors' {
				* Load estimates
				quietly estimates restore f`f'_`x'_j`j'

				* Convert the estimates into a dataset
				matrix B = e(b)
				clear
				quietly svmat B, names(col)
				quietly gen byte g = `f'
				quietly gen byte j = `j'
				quietly gen yvar = "`x'"
				quietly reshape long coef, i(g j yvar) j(m)

				* Compute excess flows, relative to the average month
				quietly sum coef if inrange(m, 1, 12)
				quietly replace coef = coef - r(mean)
				rename coef value

				* Store for stacking
				tempfile f`f'_`x'_j`j'
				quietly save "`f`f'_`x'_j`j''"
			}
		}
	}

	* Stack all estimates
	clear
	foreach f of numlist 0 1 {
		foreach x in "f" "s" {
			foreach j in 0 `sectors' {
				append using "`f`f'_`x'_j`j''"
			}
		}
	}

	* Reshape
	reshape wide value, i(g j m) j(yvar) string
	rename value* *

	* Label variables
	label variable g "Gender (0 = men, 1 = women)"
	label variable j "Sector (0 = aggregate)"
	label variable m "Month (0 = May)"
	label variable f "Share finding employment (non-E => E)"
	label variable s "Share separating from employment (E => non-E)"

	* Save to disk
	order g j m f s
	save "$basepath/models/winter_sectors/coefficients.dta", replace
}


* Compute decomposition terms
*-------------------------------------------------------------------------------

* Load dataset of estimates
use "$basepath/models/winter_sectors/coefficients.dta", clear

* Calculate gross and net excess flows
bysort g j (m): gen net_epop = sum(100 * (f - s))
keep g j m net_epop

* Improve variable names
rename g female
rename j sector
rename m month_adj

* Normalize October to zero
bysort female sector (month_adj): egen base_epop = total(net_epop * (month_adj == 5))
replace net_epop = net_epop - base_epop
drop base_epop

* Reshape
reshape wide net_epop, i(female month_adj) j(sector)

rename net_epop1 agcon
rename net_epop2 trade
rename net_epop3 auxil
rename net_epop4 other

* Bar positioning
gen curr_pos = 0
gen curr_neg = 0

foreach v in "agcon" "trade" "auxil" "other" {
	gen `v'0 = .
	replace `v'0 = curr_pos if sign(`v') >= 0
	replace `v'0 = curr_neg if sign(`v') < 0

	gen `v'1 = `v'0 + `v'
	replace curr_pos = curr_pos + `v' if `v' > 0
	replace curr_neg = curr_neg + `v' if `v' < 0
}

drop curr_*

* Prepare background shading
gen rlow = -2
gen rupp = 0.5
gen rval = month_adj + 0.5

* Plot estimates
foreach f of numlist 0 1 {
	if `f' == 0 local flbl "Men"
	if `f' == 1 local flbl "Women"

	#delimit ;
	twoway
		(rarea rupp rlow rval if female == `f' & inrange(month_adj, 4, 12), color($ltgs) plotregion(margin(t=0 b=0 l=0)))
		(rbar agcon0 agcon1 month_adj if female == `f', color($col1) fcolor(*1.20) lcolor(black) lwidth(vthin))
		(rbar trade0 trade1 month_adj if female == `f', color($col2) fcolor(*0.8) lcolor(black) lwidth(vthin))
		(rbar auxil0 auxil1 month_adj if female == `f', color($col3) fcolor(*0.6) lcolor(black) lwidth(vthin))
		(rbar other0 other1 month_adj if female == `f', color($col4) fcolor(*0.6) lcolor(black) lwidth(vthin))
		(connected net_epop0 month_adj if female == `f', color(black) msymbol(circle) clwidth(medthick))
		(scatteri 0 -0.5 0 12.5, recast(line) color(black)),
		title("{bf:`flbl'}")
		xtitle("")
		xscale(range(0.5 12.5))
		xlabel(0 "May" 1 "June" 2 "July" 3 "Aug." 4 "Sept." 5 "Oct." 6 "Nov." 7 "Dec." 8 "Jan." 9 "Feb." 10 "Mar." 11 "Apr." 12 "May", alternate)
		ytitle("")
		yscale(range(-2 0.5) titlegap(*-8))
		ylabel(-2(.5)0.5)
		graphregion(margin(small))
		plotregion(margin(zero))
		legend(rows(3) size(*0.8) order(6 - 2 3 4 5) rowgap(*2.5)
			label(2 "Construction and agriculture")
			label(3 "Wholesale and retail trade")
			label(4 "Manufacturing and transportation")
			label(5 "All other sectors")
			label(6 "Overall"));
	#delimit cr

	tempfile g`f'
	graph save "`g`f''"
}

* Create pdf file
grc1leg "`g1'" "`g0'", l1title("    {&Delta} in EPOP relative to Oct. (p.p.)", size(*.8)) imargin(vsmall)
nicepdf "$basepath/output/winter_sectors.pdf", panels(2) cropbuffer(2) indirect replace

* Close the log file
unlaunch
